Requires 02_motif_extract_run_nov_2019_no_coverage_filtering_postproc.Rmd to be run first.

Motifs are classified as methylated or unmethylated and counted, without aggregating average methylation values per sample to avoid batch/genotype effects.

Started 11 March 2020

Config

Expand

Libraries, knitr options, permutation numbers, threads.

opts_chunk$set(fig.width = 5,
               fig.height = 5,
               dpi = 200,
               cache = FALSE,
               include = TRUE,
               cache.lazy = FALSE,
               warning = TRUE,
               message = TRUE)
options(bitmapType="cairo")
ac <- function(col, alpha=1){

  apply(sapply(col, col2rgb)/255, 2, 
                     function(x) 
                       rgb(x[1], x[2], x[3], alpha=alpha))  
}

get_palette <- function(alpha = alpha) {
    data.frame(
        nucleotide = c('G', 'C', 'A', 'T'),
        color = ac(c('#F6A318', '#2B4C9B',  '#68B42E', '#E52521'),
                   alpha),
        stringsAsFactors = FALSE)
}

color_by_nucleotide_and_position <- function(motifs, start, alpha){
    
    palette <- get_palette(alpha = alpha)
    
    tmp <- data.frame(motif = as.character(motifs),
                      nucleotide = substr(as.character(motifs), start, start ))

    tmp2 <- merge(tmp, palette, by.x = 'nucleotide', by.y = 'nucleotide', all.x = TRUE)
    rownames(tmp2) <- tmp2$motif

    tmp3 <- ac(tmp2[as.character(motifs), 'color'], alpha = alpha)
    names(tmp3) <- as.character(tmp2[as.character(motifs), 'nucleotide'])
    return(tmp3)  
}

color_by_nucleotide_and_position(c('ACG', 'CGT'), 1, 1)
##           A           C 
## "#68B42EFF" "#2B4C9BFF"

Data retrieval

The object was generated by 02_motif_extract_run_nov_2019_no_coverage_filtering_postproc.Rmd

con <- url('http://imlstaupo.uzh.ch/imallona/baubec/cg_context/counts_nested_list.RData')
load(con)
close(con)

CG context

Data reshaping

tko+d3a2_merged CG

stopifnot(all(d$cg$`tko+d3a2_merged`$meth$motif == d$cg$`tko+d3a2_merged`$unmeth$motif))

a <- data.frame(motif = d$cg$`tko+d3a2_merged`$meth$motif,
                meth = d$cg$`tko+d3a2_merged`$meth$count,
                unmeth = d$cg$`tko+d3a2_merged`$unmeth$count)

rownames(a) <- a$motif
a$score <- a$meth / (a$meth + a$unmeth)
a$cov <- a$meth + a$unmeth

## shuffling for plotting later
set.seed(2)
a <- a[sample(rownames(a), nrow(a), replace = FALSE),]
DT::datatable(a %>% as.data.frame() %>% 
              dplyr::mutate_if(is.numeric, funs(round(., 2))), 
              extensions = c("Buttons", "FixedColumns"),
              rownames = FALSE, 
              options = list(dom = "Bfrtip",
                             scrollX = TRUE, 
                             fixedColumns = list(leftColumns = 1),
                             buttons = c("csv", "excel")))

tko+d3b1_merged CG

b <- data.frame(motif = d$cg$`tko+d3b1_merged`$meth$motif,
                meth = d$cg$`tko+d3b1_merged`$meth$count,
                unmeth = d$cg$`tko+d3b1_merged`$unmeth$count)

rownames(b) <- b$motif
b$score <- b$meth / (b$meth + b$unmeth)
b$cov <- b$meth + b$unmeth

## shuffling for plotting later
set.seed(2)
b <- b[sample(rownames(b), nrow(b), replace = FALSE),]
DT::datatable(b %>% as.data.frame() %>% 
              dplyr::mutate_if(is.numeric, funs(round(., 2))), 
              extensions = c("Buttons", "FixedColumns"),
              rownames = FALSE, 
              options = list(dom = "Bfrtip",
                             scrollX = TRUE, 
                             fixedColumns = list(leftColumns = 1),
                             buttons = c("csv", "excel")))

All integrated

stopifnot(a$motif == b$motif)

fd  <- data.frame(motif = a$motif,
                  meth_dnmt3a = a$meth,
                  unmeth_dnmt3a = a$unmeth,
                  cov_dnmt3a = a$cov,
                  score_dnmt3a = a$score,                  
                  meth_dnmt3b = b$meth,
                  unmeth_dnmt3b = b$unmeth,
                  cov_dnmt3b = b$cov,
                  score_dnmt3b = b$score)

Suggested plot for CG context

MA plot with the log2 ratio of the scores (y axes) and the log10-transformed coverage average (x axes).

alpha = 1
col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


p <- list()
for (i in 1:8) {
    alpha = 0.3
    cols <- color_by_nucleotide_and_position(fd$motif,
                                             start = i, alpha = alpha)
    
    cols[1:3]

    fd[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fd$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    p[[as.character(i)]] <- ggplot(
        fd,
        aes(y = log2(score_dnmt3a/score_dnmt3b),
            x = (cov_dnmt3a + cov_dnmt3b)/2,
            colour = get(sprintf('nt_%s', i)))) +
        geom_point() +
        col_scale +
        scale_x_continuous(trans='log10') +
        xlab('average coverage') +
        ylab('log2 (DNMT3A/DNMT3B) scores') +
        ggtitle(sprintf("Nucleotide %s", i-4)) +
        theme(legend.position = "none")        

    p[[as.character(i)]] <- ggMarginal(p[[as.character(i)]],
                                       margins = 'y',
                                       groupColour = TRUE,
                                       groupFill = TRUE)

}
## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).
do.call("grid.arrange", c(p, ncol=3))

PDF version with half the point size

alpha = 1
col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


p <- list()
for (i in 1:8) {
    alpha = 0.3
    cols <- color_by_nucleotide_and_position(fd$motif,
                                             start = i, alpha = alpha)
    
    cols[1:3]

    fd[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fd$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    p[[as.character(i)]] <- ggplot(
        fd,
        aes(y = log2(score_dnmt3a/score_dnmt3b),
            x = (cov_dnmt3a + cov_dnmt3b)/2,
            colour = get(sprintf('nt_%s', i)))) +
        geom_point(size = 0.5) +
        col_scale +
        scale_x_continuous(trans='log10') +
        xlab('average coverage') +
        ylab('log2 (DNMT3A/DNMT3B) scores') +
        ggtitle(sprintf("Nucleotide %s", i-4)) +
        theme(legend.position = "none")        

    p[[as.character(i)]] <- ggMarginal(p[[as.character(i)]],
                                       margins = 'y',
                                       groupColour = TRUE,
                                       groupFill = TRUE)

}
## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).
arranged <- do.call("grid.arrange", c(p, ncol=3))

ggsave(filename = 'cg_context.pdf', plot = arranged,
       width = 10, height = 10)

Cs and Gs (positions 0 and 1) omitted for the sake of brevity (see below; Mark’s suggestion).

alpha = 1
col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


p <- list()
for (i in 1:8) {
    alpha = 0.3
    cols <- color_by_nucleotide_and_position(fd$motif,
                                             start = i, alpha = alpha)
    
    cols[1:3]

    fd[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fd$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    if (!(all(as.character( fd[,sprintf('nt_%i', i)]) == 'C') |
          all(as.character( fd[,sprintf('nt_%i', i)]) == 'G'))) {
        p[[as.character(i)]] <- ggplot(
            fd,
            aes(y = log2(score_dnmt3a/score_dnmt3b),
                x = (cov_dnmt3a + cov_dnmt3b)/2,
                colour = get(sprintf('nt_%s', i)))) +
            geom_point() +
            col_scale +
            scale_x_continuous(trans='log10') +
            xlab('average coverage') +
            ylab('log2 (DNMT3A/DNMT3B) scores') +
            ggtitle(sprintf("Nucleotide %s", i-4)) +
            theme(legend.position = "none")        

        p[[as.character(i)]] <- ggMarginal(p[[as.character(i)]],
                                           margins = 'y',
                                           groupColour = TRUE,
                                           groupFill = TRUE)
    }
}
## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).
do.call("grid.arrange", c(p, ncol=3))

Other plots CG

log2 scores vs mean coverage

stopifnot(a$motif == b$motif)
p <- list()

alpha = 0.3

col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


for (i in 1:8) {
    cols <- color_by_nucleotide_and_position(fd$motif,
                                             start = i, alpha = alpha)
    
    cols[1:3]

    fd[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fd$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    
    p[[i]] <- ggplot(fd, aes(y = log2(score_dnmt3a/score_dnmt3b),
                             x = (cov_dnmt3a + cov_dnmt3b)/2,
                             colour = get(sprintf('nt_%s', i)))) +
        geom_point() +
        col_scale +
        xlab('average coverage') +
        ylab('log2 (DNMT3A/DNMT3B) scores') +
        ggtitle(sprintf("Nucleotide %s", i-4)) +
        theme(legend.position = "none")

    p[[i]] <- ggMarginal(p[[i]], groupColour = TRUE, groupFill = TRUE)

}
## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).

## Warning: Removed 7 rows containing non-finite values (stat_density).
do.call("grid.arrange", c(p, ncol=3))

delta vs mean coverage

stopifnot(a$motif == b$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = a$score - b$score,
         x = (a$cov + b$cov)/2,
         col = color_by_nucleotide_and_position(a$motif, start = i, alpha = 0.5),
         pch = 19,
         ylab = "DNMT3A score - DNMT3B score",
         xlab = "mean coverage",
         main = sprintf('position %s', i))
}

log2 scores vs DNMT3 A score

stopifnot(a$motif == b$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = log2(a$score/b$score),
         x = a$score,
         col = color_by_nucleotide_and_position(a$motif, start = i, alpha = 0.5),
         pch = 19,
         ylab = "log2(DNMT3A score / DNMT3B score)",
         xlab = "DNMT3A score",
         main = sprintf('position %s', i))
}

delta vs DNMT3 A score

stopifnot(a$motif == b$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = a$score - b$score,
         x = a$score,
         col = color_by_nucleotide_and_position(a$motif, start = i, alpha = 0.5),
         pch = 19,
         ylab = "DNMT3A score - DNMT3B score",
         xlab = "DNMT3A score",
         main = sprintf('position %s', i))
}

log2 scores vs DNMT3 B score

stopifnot(a$motif == b$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = log2(a$score/b$score),
         x = b$score,
         col = color_by_nucleotide_and_position(a$motif, start = i, alpha = 0.5),
         pch = 19,
         ylab = "log2(DNMT3A score / DNMT3B score)",
         xlab = "DNMT3B score",
         main = sprintf('position %s', i))
}

delta vs DNMT3 B score

stopifnot(a$motif == b$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = a$score - b$score,
         x = b$score,
         col = color_by_nucleotide_and_position(a$motif, start = i, alpha = 0.5),
         pch = 19,
         ylab = "DNMT3A score - DNMT3B score",
         xlab = "DNMT3B score",
         main = sprintf('position %s', i))
}

rm(a, b, fd)

CH context

Data reshaping

tko+d3a2_merged CH

stopifnot(all(d$ch$`tko+d3a2_merged`$meth$motif == d$ch$`tko+d3a2_merged`$unmeth$motif))

ha <- data.frame(motif = d$ch$`tko+d3a2_merged`$meth$motif,
                meth = d$ch$`tko+d3a2_merged`$meth$count,
                unmeth = d$ch$`tko+d3a2_merged`$unmeth$count)

rownames(ha) <- ha$motif
ha$score <- ha$meth / (ha$meth + ha$unmeth)
ha$cov <- ha$meth + ha$unmeth

## shuffling for plotting later
set.seed(2)
ha <- ha[sample(rownames(ha), nrow(ha), replace = FALSE),]
DT::datatable(ha %>% as.data.frame() %>% 
              dplyr::mutate_if(is.numeric, funs(round(., 2))), 
              extensions = c("Buttons", "FixedColumns"),
              rownames = FALSE, 
              options = list(dom = "Bfrtip",
                             scrollX = TRUE, 
                             fixedColumns = list(leftColumns = 1),
                             buttons = c("csv", "excel")))

tko+d3b1_merged CH

hb <- data.frame(motif = d$ch$`tko+d3b1_merged`$meth$motif,
                meth = d$ch$`tko+d3b1_merged`$meth$count,
                unmeth = d$ch$`tko+d3b1_merged`$unmeth$count)

rownames(hb) <- hb$motif
hb$score <- hb$meth / (hb$meth + hb$unmeth)
hb$cov <- hb$meth + hb$unmeth

## shuffling for plotting later
set.seed(2)
hb <- hb[sample(rownames(hb), nrow(hb), replace = FALSE),]
DT::datatable(hb %>% as.data.frame() %>% 
              dplyr::mutate_if(is.numeric, funs(round(., 2))), 
              extensions = c("Buttons", "FixedColumns"),
              rownames = FALSE, 
              options = list(dom = "Bfrtip",
                             scrollX = TRUE, 
                             fixedColumns = list(leftColumns = 1),
                             buttons = c("csv", "excel")))

All integrated

stopifnot(ha$motif == hb$motif)

fdh  <- data.frame(motif = ha$motif,
                  meth_dnmt3a = ha$meth,
                  unmeth_dnmt3a = ha$unmeth,
                  cov_dnmt3a = ha$cov,
                  score_dnmt3a = ha$score,                  
                  meth_dnmt3b = hb$meth,
                  unmeth_dnmt3b = hb$unmeth,
                  cov_dnmt3b = hb$cov,
                  score_dnmt3b = hb$score)

Suggested plot for CH

MA plot with the log2 ratio of the scores (y axes) and the log10-transformed coverage average (x axes).

alpha = 1
col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


p <- list()
for (i in 1:8) {
    alpha = 0.3
    cols <- color_by_nucleotide_and_position(fdh$motif,
                                             start = i, alpha = alpha)
    

    fdh[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fdh$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    p[[as.character(i)]] <- ggplot(
        fdh,
        aes(y = log2(score_dnmt3a/score_dnmt3b),
            x = (cov_dnmt3a + cov_dnmt3b)/2,
            colour = get(sprintf('nt_%s', i)))) +
        geom_point() +
        col_scale +
        scale_x_continuous(trans='log10') +
        xlab('average coverage') +
        ylab('log2 (DNMT3A/DNMT3B) scores') +
        ggtitle(sprintf("Nucleotide %s", i-4)) +
        theme(legend.position = "none")

    p[[as.character(i)]] <- ggMarginal(p[[as.character(i)]],
                                       margins = 'y',
                                       groupColour = TRUE,
                                       groupFill = TRUE)
    
}
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
do.call("grid.arrange", c(p, ncol=3))

PDF version with half the point size

alpha = 1
col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


p <- list()
for (i in 1:8) {
    alpha = 0.3
    cols <- color_by_nucleotide_and_position(fdh$motif,
                                             start = i, alpha = alpha)
    

    fdh[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fdh$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    p[[as.character(i)]] <- ggplot(
        fdh,
        aes(y = log2(score_dnmt3a/score_dnmt3b),
            x = (cov_dnmt3a + cov_dnmt3b)/2,
            colour = get(sprintf('nt_%s', i)))) +
        geom_point(size = 0.5) +
        col_scale +
        scale_x_continuous(trans='log10') +
        xlab('average coverage') +
        ylab('log2 (DNMT3A/DNMT3B) scores') +
        ggtitle(sprintf("Nucleotide %s", i-4)) +
        theme(legend.position = "none")

    p[[as.character(i)]] <- ggMarginal(p[[as.character(i)]],
                                       margins = 'y',
                                       groupColour = TRUE,
                                       groupFill = TRUE)
    
}
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
arranged <- do.call("grid.arrange", c(p, ncol=3))

ggsave(filename = 'ch_context.pdf', plot = arranged,
       width = 10, height = 10)

Plus a variant with the coverage margins

alpha = 1
col_scale <- scale_colour_manual(
    name = sprintf(""),
    labels = get_palette(alpha = alpha)$nucleotide,
    values = get_palette(alpha = alpha)$color,
    drop = FALSE)


p <- list()
for (i in 1:8) {
    alpha = 0.3
    cols <- color_by_nucleotide_and_position(fdh$motif,
                                             start = i, alpha = alpha)
    

    fdh[,sprintf('nt_%i', i)] <- factor(substr(
        as.character(fdh$motif), i, i),
        levels = get_palette(alpha = alpha)$nucleotide)

    p[[as.character(i)]] <- ggplot(
        fdh,
        aes(y = log2(score_dnmt3a/score_dnmt3b),
            x = (cov_dnmt3a + cov_dnmt3b)/2,
            colour = get(sprintf('nt_%s', i)))) +
        geom_point() +
        col_scale +
        scale_x_continuous(trans='log10') +
        xlab('average coverage') +
        ylab('log2 (DNMT3A/DNMT3B) scores') +
        ggtitle(sprintf("Nucleotide %s", i-4)) +
        theme(legend.position = "none")

    p[[as.character(i)]] <- ggMarginal(p[[as.character(i)]],
                                       groupColour = TRUE,
                                       groupFill = TRUE)
    
}
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
## Warning: Removed 22 rows containing missing values (geom_point).

## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 247 rows containing non-finite values (stat_density).

## Warning: Removed 247 rows containing non-finite values (stat_density).
do.call("grid.arrange", c(p, ncol=3))

Other plots CH

stopifnot(ha$motif == hb$motif)

delta vs mean coverage

stopifnot(ha$motif == hb$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = ha$score - hb$score,
         x = (ha$cov + hb$cov)/2,
         col = color_by_nucleotide_and_position(ha$motif, start = i,
                                                alpha = 0.5),
         pch = 19,
         ylab = "DNMT3A score - DNMT3B score",
         xlab = "mean coverage",
         main = sprintf('position %s', i))
}

log2 scores vs DNMT3 A score

stopifnot(ha$motif == hb$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = log2(ha$score/hb$score),
         x = ha$score,
         col = color_by_nucleotide_and_position(ha$motif, start = i,
                                                alpha = 0.5),
         pch = 19,
         ylab = "log2(DNMT3A score / DNMT3B score)",
         xlab = "DNMT3A score",
         main = sprintf('position %s', i))
}

delta vs DNMT3 A score

stopifnot(ha$motif == hb$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = ha$score - hb$score,
         x = ha$score,
         col = color_by_nucleotide_and_position(ha$motif,
                                                start = i, alpha = 0.5),
         pch = 19,
         ylab = "DNMT3A score - DNMT3B score",
         xlab = "DNMT3A score",
         main = sprintf('position %s', i))
}

log2 scores vs DNMT3 B score

stopifnot(ha$motif == hb$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = log2(ha$score/hb$score),
         x = hb$score,
         col = color_by_nucleotide_and_position(ha$motif, start = i,
                                                alpha = 0.5),
         pch = 19,
         ylab = "log2(DNMT3A score / DNMT3B score)",
         xlab = "DNMT3B score",
         main = sprintf('position %s', i))
}

delta vs DNMT3 B score

stopifnot(ha$motif == hb$motif)

par(mfrow = c(3,3), pty = 's')

for (i in 1:8){
    plot( y = ha$score - hb$score,
         x = hb$score,
         col = color_by_nucleotide_and_position(ha$motif,
                                                start = i, alpha = 0.5),
         pch = 19,
         ylab = "DNMT3A score - DNMT3B score",
         xlab = "DNMT3B score",
         main = sprintf('position %s', i))
}

Timestamp

date()
## [1] "Mon Mar 16 13:49:48 2020"
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /home/imallona/soft/R/R-3.6.1/lib/libRblas.so
## LAPACK: /home/imallona/soft/R/R-3.6.1/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] gridExtra_2.3 dplyr_0.8.3   ggExtra_0.8   DT_0.7        ggplot2_3.2.1
## [6] Cairo_1.5-10  knitr_1.23   
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_0.2.5  xfun_0.8          remotes_2.1.0    
##  [4] purrr_0.3.3       testthat_2.2.1    colorspace_1.4-1 
##  [7] usethis_1.5.1     miniUI_0.1.1.1    htmltools_0.3.6  
## [10] yaml_2.2.0        rlang_0.4.5       pkgbuild_1.0.4   
## [13] pillar_1.4.2      later_0.8.0       glue_1.3.1       
## [16] withr_2.1.2       sessioninfo_1.1.1 stringr_1.4.0    
## [19] munsell_0.5.0     gtable_0.3.0      htmlwidgets_1.3  
## [22] devtools_2.1.0    codetools_0.2-16  evaluate_0.14    
## [25] memoise_1.1.0     labeling_0.3      callr_3.3.1      
## [28] crosstalk_1.0.0   httpuv_1.5.1      ps_1.3.0         
## [31] Rcpp_1.0.1        xtable_1.8-4      backports_1.1.4  
## [34] scales_1.0.0      promises_1.0.1    desc_1.2.0       
## [37] pkgload_1.0.2     jsonlite_1.6      fs_1.3.1         
## [40] mime_0.7          digest_0.6.20     stringi_1.4.3    
## [43] processx_3.4.1    shiny_1.3.2       rprojroot_1.3-2  
## [46] grid_3.6.1        cli_1.1.0         tools_3.6.1      
## [49] magrittr_1.5      lazyeval_0.2.2    tibble_2.1.3     
## [52] crayon_1.3.4      pkgconfig_2.0.2   prettyunits_1.0.2
## [55] assertthat_0.2.1  rmarkdown_1.14    R6_2.4.0         
## [58] compiler_3.6.1
devtools::session_info()
## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.1 (2019-07-05)
##  os       Ubuntu 16.04.6 LTS          
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language en_US                       
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       Europe/Madrid               
##  date     2020-03-16                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package     * version date       lib source        
##  assertthat    0.2.1   2019-03-21 [1] CRAN (R 3.6.1)
##  backports     1.1.4   2019-04-10 [1] CRAN (R 3.6.1)
##  Cairo       * 1.5-10  2019-03-28 [1] CRAN (R 3.6.1)
##  callr         3.3.1   2019-07-18 [1] CRAN (R 3.6.1)
##  cli           1.1.0   2019-03-19 [1] CRAN (R 3.6.1)
##  codetools     0.2-16  2018-12-24 [1] CRAN (R 3.6.1)
##  colorspace    1.4-1   2019-03-18 [1] CRAN (R 3.6.1)
##  crayon        1.3.4   2017-09-16 [1] CRAN (R 3.6.1)
##  crosstalk     1.0.0   2016-12-21 [1] CRAN (R 3.6.1)
##  desc          1.2.0   2018-05-01 [1] CRAN (R 3.6.1)
##  devtools      2.1.0   2019-07-06 [1] CRAN (R 3.6.1)
##  digest        0.6.20  2019-07-04 [1] CRAN (R 3.6.1)
##  dplyr       * 0.8.3   2019-07-04 [1] CRAN (R 3.6.1)
##  DT          * 0.7     2019-06-11 [1] CRAN (R 3.6.1)
##  evaluate      0.14    2019-05-28 [1] CRAN (R 3.6.1)
##  fs            1.3.1   2019-05-06 [1] CRAN (R 3.6.1)
##  ggExtra     * 0.8     2018-04-04 [1] CRAN (R 3.6.1)
##  ggplot2     * 3.2.1   2019-08-10 [1] CRAN (R 3.6.1)
##  glue          1.3.1   2019-03-12 [1] CRAN (R 3.6.1)
##  gridExtra   * 2.3     2017-09-09 [1] CRAN (R 3.6.1)
##  gtable        0.3.0   2019-03-25 [1] CRAN (R 3.6.1)
##  htmltools     0.3.6   2017-04-28 [1] CRAN (R 3.6.1)
##  htmlwidgets   1.3     2018-09-30 [1] CRAN (R 3.6.1)
##  httpuv        1.5.1   2019-04-05 [1] CRAN (R 3.6.1)
##  jsonlite      1.6     2018-12-07 [1] CRAN (R 3.6.1)
##  knitr       * 1.23    2019-05-18 [1] CRAN (R 3.6.1)
##  labeling      0.3     2014-08-23 [1] CRAN (R 3.6.1)
##  later         0.8.0   2019-02-11 [1] CRAN (R 3.6.1)
##  lazyeval      0.2.2   2019-03-15 [1] CRAN (R 3.6.1)
##  magrittr      1.5     2014-11-22 [1] CRAN (R 3.6.1)
##  memoise       1.1.0   2017-04-21 [1] CRAN (R 3.6.1)
##  mime          0.7     2019-06-11 [1] CRAN (R 3.6.1)
##  miniUI        0.1.1.1 2018-05-18 [1] CRAN (R 3.6.1)
##  munsell       0.5.0   2018-06-12 [1] CRAN (R 3.6.1)
##  pillar        1.4.2   2019-06-29 [1] CRAN (R 3.6.1)
##  pkgbuild      1.0.4   2019-08-05 [1] CRAN (R 3.6.1)
##  pkgconfig     2.0.2   2018-08-16 [1] CRAN (R 3.6.1)
##  pkgload       1.0.2   2018-10-29 [1] CRAN (R 3.6.1)
##  prettyunits   1.0.2   2015-07-13 [1] CRAN (R 3.6.1)
##  processx      3.4.1   2019-07-18 [1] CRAN (R 3.6.1)
##  promises      1.0.1   2018-04-13 [1] CRAN (R 3.6.1)
##  ps            1.3.0   2018-12-21 [1] CRAN (R 3.6.1)
##  purrr         0.3.3   2019-10-18 [1] CRAN (R 3.6.1)
##  R6            2.4.0   2019-02-14 [1] CRAN (R 3.6.1)
##  Rcpp          1.0.1   2019-03-17 [1] CRAN (R 3.6.1)
##  remotes       2.1.0   2019-06-24 [1] CRAN (R 3.6.1)
##  rlang         0.4.5   2020-03-01 [1] CRAN (R 3.6.1)
##  rmarkdown     1.14    2019-07-12 [1] CRAN (R 3.6.1)
##  rprojroot     1.3-2   2018-01-03 [1] CRAN (R 3.6.1)
##  scales        1.0.0   2018-08-09 [1] CRAN (R 3.6.1)
##  sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 3.6.1)
##  shiny         1.3.2   2019-04-22 [1] CRAN (R 3.6.1)
##  stringi       1.4.3   2019-03-12 [1] CRAN (R 3.6.1)
##  stringr       1.4.0   2019-02-10 [1] CRAN (R 3.6.1)
##  testthat      2.2.1   2019-07-25 [1] CRAN (R 3.6.1)
##  tibble        2.1.3   2019-06-06 [1] CRAN (R 3.6.1)
##  tidyselect    0.2.5   2018-10-11 [1] CRAN (R 3.6.1)
##  usethis       1.5.1   2019-07-04 [1] CRAN (R 3.6.1)
##  withr         2.1.2   2018-03-15 [1] CRAN (R 3.6.1)
##  xfun          0.8     2019-06-25 [1] CRAN (R 3.6.1)
##  xtable        1.8-4   2019-04-21 [1] CRAN (R 3.6.1)
##  yaml          2.2.0   2018-07-25 [1] CRAN (R 3.6.1)
## 
## [1] /home/imallona/soft/R/R-3.6.1/library